Datasource : https://catalog.data.gov/dataset/real-world-fuel-efficiency/resource/5668a995-6b32-45e9-b3a4-f45385695a06
# Load in some packages
import calendar
import pandas as pd
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")
fuel_df = pd.read_csv(r"C:\Users\jki\Downloads\Real-World_Fuel_Efficiency.csv")
fuel_df
YEAR | MAKE | MODEL | Hybrid/Non-Hybrid | STANDARD TYPE | EPA RATING (CITY) | Vehicle Count | TOTAL ACTUAL MILES | TOTAL ACTUAL FUEL | EPA EXPECTED FUEL | ACTUAL FUEL ECONOMY Geotab | PERCENT % DIFFERENCE ACTUAL TO EPA | ESTIMATED FUEL COSTS PER GALLON | FUEL COSTS PER MILE | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 2008 | HONDA | CIVIC GX | Non-Hybrid | SEDAN | 24 | 9 | 4459 | 221.0 | 185.79 | 20.18 | -0.16 | $ 552.50 | $ 0.12 |
1 | 2008 | FORD | CROWN VIC | Non-Hybrid | SEDAN | 11 | 7 | 17162 | 1771.0 | 1560.18 | 9.69 | -0.12 | $ 4,427.50 | $ 0.26 |
2 | 2008 | CHEVROLET | EXPRESS | Non-Hybrid | VAN | 13 | 1 | 413 | 65.0 | 31.77 | 6.35 | -0.51 | $ 162.50 | $ 0.39 |
3 | 2008 | FORD | FOCUS | Non-Hybrid | SEDAN | 24 | 49 | 13074 | 793.0 | 544.75 | 16.49 | -0.31 | $ 1,982.50 | $ 0.15 |
4 | 2008 | FORD | FUSION | Non-Hybrid | SEDAN | 18 | 5 | 29482 | 1813.0 | 1637.89 | 16.26 | -0.10 | $ 4,532.50 | $ 0.15 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
101 | 2019 | CHEVROLET | EXPRESS | Non-Hybrid | VAN | 13 | 22 | 10316 | 885.0 | 793.54 | 11.66 | -0.10 | $ 2,212.50 | $ 0.21 |
102 | 2019 | FORD | F150 | Non-Hybrid | PICKUP | 16 | 27 | 56410 | 5419.0 | 3525.63 | 10.41 | -0.35 | $ 13,547.50 | $ 0.24 |
103 | 2019 | FORD | F250 | Non-Hybrid | PICKUP | 12 | 30 | 84521 | 5449.0 | 7043.42 | 15.51 | 0.29 | $ 13,622.50 | $ 0.16 |
104 | 2019 | FREIGHTLINER | SPRINTER 3500 | Non-Hybrid | VAN | 15 | 38 | 3277 | 296.0 | 218.47 | 11.07 | -0.26 | $ 740.00 | $ 0.23 |
105 | 2019 | FORD | TRANSIT VAN | Non-Hybrid | VAN | 20 | 7 | 29068 | 2331.0 | 1453.40 | 12.47 | -0.38 | $ 5,827.50 | $ 0.20 |
106 rows × 14 columns
fuel_df.describe()
YEAR | EPA RATING (CITY) | Vehicle Count | TOTAL ACTUAL MILES | TOTAL ACTUAL FUEL | EPA EXPECTED FUEL | ACTUAL FUEL ECONOMY Geotab | PERCENT % DIFFERENCE ACTUAL TO EPA | |
---|---|---|---|---|---|---|---|---|
count | 106.000000 | 106.000000 | 106.000000 | 1.060000e+02 | 106.000000 | 106.000000 | 106.000000 | 106.000000 |
mean | 2013.698113 | 23.254717 | 38.320755 | 1.742588e+05 | 8470.372642 | 6903.022358 | 19.883491 | -0.166321 |
std | 3.165799 | 11.495806 | 51.139128 | 2.837817e+05 | 9212.284006 | 7641.761732 | 12.329095 | 0.226229 |
min | 2008.000000 | 9.000000 | 1.000000 | 4.130000e+02 | 65.000000 | 31.770000 | 5.350000 | -0.670000 |
25% | 2011.000000 | 15.000000 | 10.000000 | 3.217475e+04 | 1765.750000 | 1412.797500 | 10.545000 | -0.317500 |
50% | 2014.000000 | 19.000000 | 20.500000 | 8.400450e+04 | 5365.500000 | 4511.125000 | 14.930000 | -0.160000 |
75% | 2016.000000 | 28.000000 | 45.750000 | 1.800435e+05 | 11911.500000 | 9723.207500 | 26.090000 | -0.040000 |
max | 2019.000000 | 55.000000 | 327.000000 | 2.144639e+06 | 44874.000000 | 44679.980000 | 54.580000 | 0.730000 |
# lets check for missing
missing_values = fuel_df.isna().sum()
print(missing_values)
YEAR 0 MAKE 0 MODEL 0 Hybrid/Non-Hybrid 0 STANDARD TYPE 0 EPA RATING (CITY) 0 Vehicle Count 0 TOTAL ACTUAL MILES 0 TOTAL ACTUAL FUEL 0 EPA EXPECTED FUEL 0 ACTUAL FUEL ECONOMY Geotab 0 PERCENT % DIFFERENCE ACTUAL TO EPA 0 ESTIMATED FUEL COSTS PER GALLON 0 FUEL COSTS PER MILE 0 dtype: int64
# lets check on data types
fuel_df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 106 entries, 0 to 105 Data columns (total 14 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 YEAR 106 non-null int64 1 MAKE 106 non-null object 2 MODEL 106 non-null object 3 Hybrid/Non-Hybrid 106 non-null object 4 STANDARD TYPE 106 non-null object 5 EPA RATING (CITY) 106 non-null int64 6 Vehicle Count 106 non-null int64 7 TOTAL ACTUAL MILES 106 non-null int64 8 TOTAL ACTUAL FUEL 106 non-null float64 9 EPA EXPECTED FUEL 106 non-null float64 10 ACTUAL FUEL ECONOMY Geotab 106 non-null float64 11 PERCENT % DIFFERENCE ACTUAL TO EPA 106 non-null float64 12 ESTIMATED FUEL COSTS PER GALLON 106 non-null object 13 FUEL COSTS PER MILE 106 non-null object dtypes: float64(4), int64(4), object(6) memory usage: 11.7+ KB
# Plot the Fuel Consumtion for each Car Model
car_make =fuel_df.groupby('MAKE').sum()['TOTAL ACTUAL FUEL']
# Sort the values in descending order and select the top Car Makes
top_ten_car_make= car_make.sort_values(ascending=False).head(10)
# Display the result
print(top_ten_car_make)
# Plot the top ten Countries
top_ten_car_make.plot(kind='bar', color='blue',figsize=(10, 6))
plt.title('Top Ten Car Make Fuel Consumption')
plt.xlabel('Car Make')
plt.ylabel('Fuel Consumption')
plt.show()
MAKE FORD 477336.5 TOYOTA 222598.0 CHEVROLET 183929.0 DODGE 5199.0 NISSAN 5037.0 GMC 3243.0 FREIGHTLINER 296.0 HONDA 221.0 Name: TOTAL ACTUAL FUEL, dtype: float64
Ford has the highest Fuel Consumption
# Plot the Fuel Consumtion for each Car Model
car_make =fuel_df.groupby('MAKE').sum()['TOTAL ACTUAL FUEL']
# Sort the values in descending order and select the top Car Makes
bottom_ten_car_make= car_make.sort_values(ascending=True).head(10)
# Display the result
print(bottom_ten_car_make)
# Plot the top ten Countries
bottom_ten_car_make.plot(kind='bar', color='blue',figsize=(10, 6))
plt.title('Top Ten Car Make Fuel Consumption')
plt.xlabel('Car Make')
plt.ylabel('Fuel Consumption')
plt.show()
MAKE HONDA 221.0 FREIGHTLINER 296.0 GMC 3243.0 NISSAN 5037.0 DODGE 5199.0 CHEVROLET 183929.0 TOYOTA 222598.0 FORD 477336.5 Name: TOTAL ACTUAL FUEL, dtype: float64
Honda Has the lowest Fuel Consumption
# Plot the Fuel Consumtion for each Car Model
car_model =fuel_df.groupby('MODEL').sum()['TOTAL ACTUAL FUEL']
# Sort the values in descending order and select the top Car Makes
top_ten_car_model= car_model.sort_values(ascending=False).head(10)
# Display the result
print(top_ten_car_model)
# Plot the top ten Countries
top_ten_car_model.plot(kind='bar', color='green',figsize=(10, 6))
plt.title('Top Ten Car Model Fuel Consumption')
plt.xlabel('Car Model')
plt.ylabel('Fuel Consumption')
plt.show()
MODEL PRIUS 145517.0 EXPRESS 118350.0 F150 99767.5 ESCAPE 89590.0 EXPLORER 41198.0 RAV4 HYBRID 40028.0 F250 38613.0 FUSION HYBRID 37974.0 TRANSIT CONNECT 27652.0 TAURUS 26022.0 Name: TOTAL ACTUAL FUEL, dtype: float64
PRIUS has the highest Fuel Consumption
# Plot the Fuel Consumtion for each Car Model
car_model =fuel_df.groupby('MODEL').sum()['TOTAL ACTUAL FUEL']
# Sort the values in descending order and select the top Car Makes
bottom_ten_car_model= car_model.sort_values(ascending=True).head(10)
# Display the result
print(bottom_ten_car_model)
# Plot the top ten Countries
bottom_ten_car_model.plot(kind='bar', color='green',figsize=(10, 6))
plt.title('Bottom Ten Car Model Fuel Consumption')
plt.xlabel('Car Model')
plt.ylabel('Fuel Consumption')
plt.show()
MODEL CIVIC GX 221.0 SPRINTER 3500 296.0 AVALON HYBRID 569.0 FOCUS 793.0 RANGER 845.0 CHARGER 1061.0 YUKON 1064.0 CROWN VIC 2061.0 YUKON XL 2179.0 SILVERADO HYBRID 4125.0 Name: TOTAL ACTUAL FUEL, dtype: float64
CIVIC GX has the lowest fuel consumption
# Plot the Fuel Cocumption for hybrid cars and non hybrid cars
hybrid_category =fuel_df.groupby('Hybrid/Non-Hybrid').sum()['TOTAL ACTUAL FUEL']
# Plot the pie chart
hybrid_category.plot(kind='pie', autopct='%1.1f%%', colors=['skyblue', 'lightgreen'], figsize=(8, 8))
plt.title('Fuel Consumption for Hybrid and Non-Hybrid Cars')
plt.ylabel('') # No need for ylabel in pie chart
plt.show()
Hybrid cars are highy fuel efficent with a significant difference from Non Hybrid Vehicles
# Plot the Fuel Consumtion for each Car Model
car_body_type =fuel_df.groupby('STANDARD TYPE').sum()['TOTAL ACTUAL FUEL']
# Sort the values in descending order and select the top Body type
top_ten_car_body_type= car_body_type.sort_values(ascending=False).head(10)
# Display the result
print(top_ten_car_body_type)
# Plot the top ten Countries
top_ten_car_body_type.plot(kind='bar', color='brown',figsize=(10, 6))
plt.title(' Car body type Fuel Consumption')
plt.xlabel('Car body type ')
plt.ylabel('Fuel Consumption')
plt.show()
STANDARD TYPE SUV 274136.0 SEDAN 250728.0 VAN 207768.0 PICKUP 165227.5 Name: TOTAL ACTUAL FUEL, dtype: float64
suv body types has the highest fuel consumption
Pickup body types has the lowest fuel consumption
fuel_df.head(5)
YEAR | MAKE | MODEL | Hybrid/Non-Hybrid | STANDARD TYPE | EPA RATING (CITY) | Vehicle Count | TOTAL ACTUAL MILES | TOTAL ACTUAL FUEL | EPA EXPECTED FUEL | ACTUAL FUEL ECONOMY Geotab | PERCENT % DIFFERENCE ACTUAL TO EPA | ESTIMATED FUEL COSTS PER GALLON | FUEL COSTS PER MILE | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 2008 | HONDA | CIVIC GX | Non-Hybrid | SEDAN | 24 | 9 | 4459 | 221.0 | 185.79 | 20.18 | -0.16 | $ 552.50 | $ 0.12 |
1 | 2008 | FORD | CROWN VIC | Non-Hybrid | SEDAN | 11 | 7 | 17162 | 1771.0 | 1560.18 | 9.69 | -0.12 | $ 4,427.50 | $ 0.26 |
2 | 2008 | CHEVROLET | EXPRESS | Non-Hybrid | VAN | 13 | 1 | 413 | 65.0 | 31.77 | 6.35 | -0.51 | $ 162.50 | $ 0.39 |
3 | 2008 | FORD | FOCUS | Non-Hybrid | SEDAN | 24 | 49 | 13074 | 793.0 | 544.75 | 16.49 | -0.31 | $ 1,982.50 | $ 0.15 |
4 | 2008 | FORD | FUSION | Non-Hybrid | SEDAN | 18 | 5 | 29482 | 1813.0 | 1637.89 | 16.26 | -0.10 | $ 4,532.50 | $ 0.15 |
# Selecting data for Honda
Ford_data = fuel_df[fuel_df['MAKE'] == "FORD"]
# Extracting specific columns
Fordf1 = Ford_data ['Vehicle Count']
Fordf2 = Ford_data ['TOTAL ACTUAL FUEL']
# Checking if both conditions hold true
Ford_filtered_data = Ford_data[(Fordf1.notnull()) & (Fordf2.notnull())]
Ford_filtered_data.head(5)
YEAR | MAKE | MODEL | Hybrid/Non-Hybrid | STANDARD TYPE | EPA RATING (CITY) | Vehicle Count | TOTAL ACTUAL MILES | TOTAL ACTUAL FUEL | EPA EXPECTED FUEL | ACTUAL FUEL ECONOMY Geotab | PERCENT % DIFFERENCE ACTUAL TO EPA | ESTIMATED FUEL COSTS PER GALLON | FUEL COSTS PER MILE | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1 | 2008 | FORD | CROWN VIC | Non-Hybrid | SEDAN | 11 | 7 | 17162 | 1771.0 | 1560.18 | 9.69 | -0.12 | $ 4,427.50 | $ 0.26 |
3 | 2008 | FORD | FOCUS | Non-Hybrid | SEDAN | 24 | 49 | 13074 | 793.0 | 544.75 | 16.49 | -0.31 | $ 1,982.50 | $ 0.15 |
4 | 2008 | FORD | FUSION | Non-Hybrid | SEDAN | 18 | 5 | 29482 | 1813.0 | 1637.89 | 16.26 | -0.10 | $ 4,532.50 | $ 0.15 |
7 | 2008 | FORD | RANGER | Non-Hybrid | PICKUP | 15 | 14 | 8820 | 845.0 | 588.00 | 10.44 | -0.30 | $ 2,112.50 | $ 0.24 |
9 | 2009 | FORD | CROWN VIC | Non-Hybrid | SEDAN | 16 | 39 | 3263 | 290.0 | 203.94 | 11.25 | -0.30 | $ 725.00 | $ 0.22 |
Ford_filtered_data.describe()
YEAR | EPA RATING (CITY) | Vehicle Count | TOTAL ACTUAL MILES | TOTAL ACTUAL FUEL | EPA EXPECTED FUEL | ACTUAL FUEL ECONOMY Geotab | PERCENT % DIFFERENCE ACTUAL TO EPA | |
---|---|---|---|---|---|---|---|---|
count | 55.00000 | 55.000000 | 55.000000 | 55.000000 | 55.000000 | 55.000000 | 55.000000 | 55.000000 |
mean | 2013.60000 | 19.981818 | 29.163636 | 124784.927273 | 8678.845455 | 6577.763636 | 15.788364 | -0.209818 |
std | 3.18329 | 8.154733 | 36.182031 | 140896.517633 | 9032.667358 | 6769.011186 | 8.346563 | 0.221138 |
min | 2008.00000 | 10.000000 | 5.000000 | 3263.000000 | 289.000000 | 203.940000 | 6.620000 | -0.560000 |
25% | 2011.00000 | 15.000000 | 9.000000 | 32502.000000 | 1897.000000 | 1449.540000 | 10.425000 | -0.380000 |
50% | 2014.00000 | 18.000000 | 16.000000 | 83488.000000 | 5681.000000 | 4538.200000 | 12.470000 | -0.210000 |
75% | 2016.00000 | 21.500000 | 30.500000 | 161921.500000 | 10707.750000 | 8714.580000 | 16.695000 | -0.090000 |
max | 2019.00000 | 43.000000 | 189.000000 | 689589.000000 | 40809.000000 | 32837.570000 | 41.460000 | 0.520000 |
Ford vehicle were mostly used between 2012 to 2014
import plotly.express as px
# Plot relationship between life expectancy and years
fig = px.bar(Ford_filtered_data, x='YEAR', y='TOTAL ACTUAL FUEL', height=320, labels={'TOTAL ACTUAL FUEL':'Fuel Consumption'}, title='Relationship between Fuel Consumption and Years - Ford')
fig.update_layout(xaxis_tickangle=-45) # Rotate x-axis labels for better readability
# Show the plot
fig.show()
Between 2016 and 2018 Ford vehicle had the highest Fuel Consumption
# Selecting data for Honda
PRIUS_data = fuel_df[fuel_df['MODEL'] == "PRIUS"]
# Extracting specific columns
PRIUSf1 = PRIUS_data['Vehicle Count']
PRIUSf2 = PRIUS_data['TOTAL ACTUAL FUEL']
# Checking if both conditions hold true
PRIUS_filtered_data = PRIUS_data[(PRIUSf1.notnull()) & (PRIUSf2.notnull())]
PRIUS_filtered_data.head(5)
YEAR | MAKE | MODEL | Hybrid/Non-Hybrid | STANDARD TYPE | EPA RATING (CITY) | Vehicle Count | TOTAL ACTUAL MILES | TOTAL ACTUAL FUEL | EPA EXPECTED FUEL | ACTUAL FUEL ECONOMY Geotab | PERCENT % DIFFERENCE ACTUAL TO EPA | ESTIMATED FUEL COSTS PER GALLON | FUEL COSTS PER MILE | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
6 | 2008 | TOYOTA | PRIUS | Hybrid | SEDAN | 48 | 189 | 1034445 | 25453.0 | 21550.94 | 40.64 | -0.15 | $ 63,632.50 | $ 0.06 |
13 | 2009 | TOYOTA | PRIUS | Hybrid | SEDAN | 48 | 86 | 341538 | 8249.0 | 7115.38 | 41.40 | -0.14 | $ 20,622.50 | $ 0.06 |
18 | 2010 | TOYOTA | PRIUS | Hybrid | SEDAN | 51 | 84 | 266257 | 5760.0 | 5220.73 | 46.23 | -0.09 | $ 14,400.00 | $ 0.05 |
26 | 2011 | TOYOTA | PRIUS | Hybrid | SEDAN | 49 | 86 | 374960 | 7968.0 | 7652.24 | 47.06 | -0.04 | $ 19,920.00 | $ 0.05 |
38 | 2012 | TOYOTA | PRIUS | Hybrid | SEDAN | 49 | 15 | 55592 | 1184.0 | 1134.53 | 46.95 | -0.04 | $ 2,960.00 | $ 0.05 |
import plotly.express as px
# Plot relationship between life expectancy and years
fig = px.bar(PRIUS_filtered_data, x='YEAR', y='TOTAL ACTUAL FUEL', height=320, labels={'TOTAL ACTUAL FUEL':'Fuel Consumption'}, title='Relationship between Fuel Consumption and Years - PRIUS')
fig.update_layout(xaxis_tickangle=-45) # Rotate x-axis labels for better readability
# Show the plot
fig.show()
Prius models had the highest fuel consumption between 2015 and 2016
# Selecting data for Honda
SUV_data = fuel_df[fuel_df['STANDARD TYPE'] == "SUV"]
# Extracting specific columns
SUVf1 = SUV_data['Vehicle Count']
SUVf2 = SUV_data['TOTAL ACTUAL FUEL']
# Checking if both conditions hold true
SUV_filtered_data = SUV_data[(SUVf1.notnull()) & (SUVf2.notnull())]
SUV_filtered_data.head(3)
YEAR | MAKE | MODEL | Hybrid/Non-Hybrid | STANDARD TYPE | EPA RATING (CITY) | Vehicle Count | TOTAL ACTUAL MILES | TOTAL ACTUAL FUEL | EPA EXPECTED FUEL | ACTUAL FUEL ECONOMY Geotab | PERCENT % DIFFERENCE ACTUAL TO EPA | ESTIMATED FUEL COSTS PER GALLON | FUEL COSTS PER MILE | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
5 | 2008 | TOYOTA | HIGHLANDER HYBRID | Hybrid | SUV | 27 | 15 | 36095 | 1667.0 | 1336.85 | 21.65 | -0.20 | $ 4,167.50 | $ 0.12 |
10 | 2009 | FORD | ESCAPE | Non-Hybrid | SUV | 19 | 14 | 3903 | 430.0 | 205.42 | 9.08 | -0.52 | $ 1,075.00 | $ 0.28 |
11 | 2009 | FORD | ESCAPE HYBRID | Hybrid | SUV | 34 | 32 | 180992 | 8729.0 | 5323.29 | 20.73 | -0.39 | $ 21,822.50 | $ 0.12 |
# Plot relationship between life expectancy and years
fig = px.bar(SUV_filtered_data, x='YEAR', y='TOTAL ACTUAL FUEL', height=320, labels={'TOTAL ACTUAL FUEL':'Fuel Consumption'}, title='SUV Fuel Consuption Histogram - SUVS')
fig.update_layout(xaxis_tickangle=-45) # Rotate x-axis labels for better readability
# Show the plot
fig.show()
SUVs had the highest fuel consumption between 2014 to 2016
1.Ford has the highest Fuel Consumption
2.HPRIUS has the highest Fuel Consumption
3.Honda Has the lowest Fuel Consumption
4.CIVIC GX has the lowest fuel consumption
5.Hybrid cars are highy fuel efficent with a significant difference from Non Hybrid Vehicles
6.suv body types has the highest fuel consumption
7.Pickup body types has the lowest fuel consumption
8.Between 2016 and 2018 Ford vehicle had the highest Fuel Consumption
9.Prius models had the highest fuel consumption between 2015 and 2016
10.SUVs had the highest fuel consumption between 2014 to 2016